library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggridges)

Load the Weather Data

weather_df = 
  rnoaa::meteo_pull_monitors(
    c("USW00094728", "USC00519397", "USS0023B17S"),
    var = c("PRCP", "TMIN", "TMAX"), 
    date_min = "2017-01-01",
    date_max = "2017-12-31") %>%
  mutate(
    name = recode(
      id, 
      USW00094728 = "CentralPark_NY", 
      USC00519397 = "Waikiki_HA",
      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())
## Registered S3 method overwritten by 'hoardr':
##   method           from
##   print.cache_info httr
## using cached file: ~/Library/Caches/R/noaa_ghcnd/USW00094728.dly
## date created (size, mb): 2022-09-03 15:53:33 (8.394)
## file min/max dates: 1869-01-01 / 2022-09-30
## using cached file: ~/Library/Caches/R/noaa_ghcnd/USC00519397.dly
## date created (size, mb): 2022-09-03 15:53:37 (1.697)
## file min/max dates: 1965-01-01 / 2020-02-29
## using cached file: ~/Library/Caches/R/noaa_ghcnd/USS0023B17S.dly
## date created (size, mb): 2022-09-03 15:53:39 (0.949)
## file min/max dates: 1999-09-01 / 2022-09-30
weather_df
## # A tibble: 1,095 × 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2017-01-01     0   8.9   4.4
##  2 CentralPark_NY USW00094728 2017-01-02    53   5     2.8
##  3 CentralPark_NY USW00094728 2017-01-03   147   6.1   3.9
##  4 CentralPark_NY USW00094728 2017-01-04     0  11.1   1.1
##  5 CentralPark_NY USW00094728 2017-01-05     0   1.1  -2.7
##  6 CentralPark_NY USW00094728 2017-01-06    13   0.6  -3.8
##  7 CentralPark_NY USW00094728 2017-01-07    81  -3.2  -6.6
##  8 CentralPark_NY USW00094728 2017-01-08     0  -3.8  -8.8
##  9 CentralPark_NY USW00094728 2017-01-09     0  -4.9  -9.9
## 10 CentralPark_NY USW00094728 2017-01-10     0   7.8  -6  
## # … with 1,085 more rows

What the r chunk did:

Scatter Plots

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

Typically, when we use ggplot

  1. choose the data frame
  2. aes()defining aesthetic mappings
  3. choose the geomatrics

New approach, same graph.

  • The code below could be used instead to produce the same figure. Using this style can be helpful if you want to do some pre-processing before making your plot but don’t want to save the intermediate data.
weather_df %>%
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

Also

We can also save the output of ggplot() to an object that can be printed and modified later.

plot_weather = 
  weather_df %>%
  ggplot(aes(x = tmin, y = tmax)) 

plot_weather + geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

Advanced Scatterplot

color and geom_smooth

ggplot(weather_df, aes(x = tmin, y = tmax, color = name)) + 
  geom_point() + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

How did we make it fancy: - color = cat_var color by catagetorical variables - geom_smooth additional geometry that can add to the plot which generate a smooth trajectery for the data

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name)) + 
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

In this r chunk, colors only apply for the geom_point. Color is not defined in the general aesthetic mappings, so we only generated one smooth line through geom_smooth.

alpha in geom_print

ggplot(weather_df, aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = 0.5) + 
  geom_smooth(se = FALSE) +
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

What we added:

  • facet_grid(. ~ col_var) the . means the row variable is not defined here. The column variable is defined (~) as “name”.
  • alpha = 0.5 in geom_point changes the transperancy level

alpha in global/general setting

ggplot(weather_df, aes(x = tmin, y = tmax, alpha = tmin, color = name)) + 
  geom_point() + 
  geom_smooth(se = FALSE) +
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

What we changed:

  • alpha = tmin in aes() gives a color gradient in the plot. the smaller “tmin” has more transparent color

New plot: Sizing the data point based on a variable

We sized the data point based on the “prcp” variable, where the higher “prcp” the bigger the data point showed on graph, via geom_point(aes(size = prcp)).

weather_df %>% 
  ggplot(aes(x = date, y = tmax, color = name)) + 
  geom_point(aes(size = prcp), alpha = 0.5) +
  geom_smooth(se = FALSE) +
  facet_grid(. ~name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

Some Notes

We can have whatever geoms we want. The number of geoms is not limited.

Having one gemo.
weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

Having two gemos.
weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) + 
  geom_point() + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

We can have a neat geom.

geom_hex()

The geom_hex() shows as hexagens.

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_hex()
## Warning: Removed 15 rows containing non-finite values (stat_binhex).

Or geom_bin2d()

The geom_bin2d() shows as squares.

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_bin2d()
## Warning: Removed 15 rows containing non-finite values (stat_bin2d).

Or something looks like a elevation map
weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_density2d() + 
  geom_point(alpha = 0.3)
## Warning: Removed 15 rows containing non-finite values (stat_density2d).
## Warning: Removed 15 rows containing missing values (geom_point).

Univariate Plots

Histograms

weather_df %>% 
  ggplot(aes(x = tmin)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 15 rows containing non-finite values (stat_bin).

Add color in histogram.

weather_df %>% 
  ggplot(aes(x = tmin, color = name)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 15 rows containing non-finite values (stat_bin).

This graph looks weired, so we want to change:

  • fill the color by using fill = name instead of color = name
  • change the histograms’ positions and make them aside to each other, via position = "dodge"
weather_df %>% 
  ggplot(aes(x = tmin, fill = name)) +
  geom_histogram(position = "dodge")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 15 rows containing non-finite values (stat_bin).

We can also change the position by separating the graph into 3 graphs, where each graph contains histograms for one location only.

weather_df %>% 
  ggplot(aes(x = tmin, fill = name)) +
  geom_histogram() +
  facet_grid(. ~ name)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 15 rows containing non-finite values (stat_bin).

New geometry geom_density

geom_density kind of smooth out the edges of the histograms. It shows the overall shape of the distribution, but we will lose those little bumps.

weather_df %>% 
  ggplot(aes(x = tmin, fill = name)) +
  geom_density(alpha = 0.4)
## Warning: Removed 15 rows containing non-finite values (stat_density).

Boxplots

weather_df %>% 
  ggplot(aes(y = tmin)) +
  geom_boxplot()
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).

We stratify the data points based on locations (“name”).

weather_df %>% 
  ggplot(aes(x = name, y = tmin)) +
  geom_boxplot()
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).

Trendy Plots

Violin Plot
weather_df %>% 
  ggplot(aes(x = name, y = tmin, fill = name)) +
  geom_violin(alpha = 0.5) +
  stat_summary(fun = "median")
## Warning: Removed 15 rows containing non-finite values (stat_ydensity).
## Warning: Removed 15 rows containing non-finite values (stat_summary).
## Warning: Removed 3 rows containing missing values (geom_segment).

Turn your head for 90 degrees, on the side, the violin plot show you the distribution.

We can add statistic features/functions in the plot by using stat_summary(fun = "median"). In this case we added one of the summary statistics functions, the median.